// countyControls.do
// Inputs: ZIP_COUNTY_032014.xlsx, US_FIPS_Codes.xls, populationincome, Yale_2014.xlsx, countypres_2000-2016.csv, acs_YYYY_5yr.csv, us.1990_2020.singleages.txt, laucntyYYYY.xlsx, seda_county_long_gcs_4.1, ucr_arrests_monthly_all_crimes_race_sex_YYYY, buildYYYY, StormEvents_details-ftp_v1.0_dYYYY_c20220425
// Outputs: Yale_Data_2014, Yale_Buyer_2014, ZIP_COUNTY_CW_2014, election_EC, countyincome, countypopulation, buyer_countyincome, buyer_countypopulation, buyer_countyeducation, SEER_Population, county_2005_2016, unemp_2001_2016, tests_2009_2016, crime_2001_2016, build_2001_2016, flood_2001_2016
// Date last updated: 1/27/2025

********************************************************************************	
*  Crosswalk ZIP to FIPS for 2014 (year which we use climate beliefs)
********************************************************************************
// Download files from https://www.huduser.gov/portal/datasets/usps_crosswalk.html

// Import data
	import excel "$pathr\ZIP_COUNTY_032014.xlsx", firstrow clear

// Rename variables
	ren ZIP buyer_zip_dw
	ren COUNTY buyer_fips
	ren RES_RATIO res_ratio

// Sort descending by res ratio and drop perfect duplicates
	gsort buyer_zip_dw -res_ratio
	duplicates drop buyer_zip_dw, force

// Keep necessary vars and save
	keep buyer_zip_dw buyer_fips
	save "$pathi\ZIP_COUNTY_CW_2014.dta", replace
	
********************************************************************************	
*  Crosswalk between county FIPS code and county name
********************************************************************************
// Download list of county names with FIPS codes from MDR Education
	* (https://mdreducation.com/pdfs/US_FIPS_Codes.xls)
	
// Import file starting at second row so can use variable names
	import excel "$pathr\US_FIPS_Codes.xls", clear cellrange(A2) firstrow
	
// Rename vars
	rename CountyName county
	rename State state
	
// Clean names for later matching
	replace county = strlower(county)
	replace state = strlower(state)
	
// Make one FIPS code
	gen fips_code = FIPSState + FIPSCounty
	destring fips_code, replace
	drop FIPSState FIPSCounty
	
// Order and save
	order fips_code county state
	save "$pathi\fips county name crosswalk.dta", replace

********************************************************************************	
*  Clean Yale Climate Belief data
********************************************************************************
// Access to Yale's Climate Opinion Survey of 2014 can be found here: https://climatecommunication.yale.edu/visualizations-data/ycom-us/

// Import raw data from Yale 
	import excel "$pathr\Yale_2014.xlsx", sheet("Sheet2") firstrow

// Keep necessary vars, rename and save
	keep Countyname Countycode happening worried timing
	ren Countycode fips_code
	save "$pathr\Yale_Data_2014.dta", replace

// Save different version for merging with buyer county
	ren fips_code buyer_fips
	save "$pathr\Yale_Buyer_2014.dta", replace

********************************************************************************	
*  Clean buyer county share that voted Democrat
********************************************************************************	
// Shares of people that voted Democrat vs Republican vs independent, sourced from the MIT Election Lab
// County presidential election returns from 2000-2020: (https://electionlab.mit.edu/data)

// Import raw data	
	import delimited "$pathr\countypres_2000-2016.csv", clear

// Make variables for share of democrats, republicans, and independents
	gen demvote = candidatevotes if party == "democrat"
	destring demvote, replace force
	gen demshare = demvote/totalvotes
	
	gen gopvote = candidatevotes if party == "republican"
	destring gopvote, replace force
	gen gopshare = gopvote/totalvotes
	
	gen indvote = candidatevotes if party == "NA"
	destring indvote, replace force
	gen indshare = indvote/totalvotes
	
// Keep relevant nonmissing vars, rename some	
	keep county year FIPS demshare gopshare indshare
	rename FIPS buyer_fips
	
// Consolidate to one obs per county-year
	foreach p in dem gop ind{
		bys buyer_fips year: egen `p'share1 = max(`p'share)
		drop `p'share
		rename `p'share1 `p'share
	}
	
	bys year county: keep if _n==1
	
// Specify election year	
	gen electionyear = year 
	drop year
	
// Clean and save
	destring buyer_fips, replace force
	drop if buyer_fips == .
	sort buyer_fips electionyear
	save "$pathi\election_EC", replace

********************************************************************************	
*  Clean population income data to for both property and buyer county
********************************************************************************
// This runs essentially the same code 4 times to import, clean, and save different versions of the data for population and income for both property and buyer county
// Raw data is from table CAINC1 from the BEA's Regional Economic Accounts 
	* https://apps.bea.gov/histdatacore/HistFileDetails.html?HistCateID=5&FileGroupID=294

// Create income (property county)
	import delimited "$pathr\populationincome", clear
	keep if unit == "Dollars"
	reshape long v, i(geofips) j(year)
	replace year = year + 1960
	replace geofips = subinstr(geofips, `"""',  "", .)
	rename geofips fips_code
	destring fips_code, replace
	rename v income
	drop geoname region tablename linecode industryclassification unit description
	sort fips_code year
	save "$pathi\countyincome", replace

// Create population (property county)
	import delimited "$pathr\populationincome", clear
	keep if unit == "Number of persons"
	reshape long v, i(geofips) j(year)
	replace year = year + 1960
	replace geofips = subinstr(geofips, `"""',  "", .)
	rename geofips fips_code
	destring fips_code, replace
	rename v population
	drop geoname region tablename linecode industryclassification unit description
	sort fips_code year
	save "$pathi\countypopulation", replace

// Create income (buyer county)
	import delimited "$pathr\populationincome", clear
	keep if unit == "Dollars"
	reshape long v, i(geofips) j(year)
	replace year = year + 1960
	replace geofips = subinstr(geofips, `"""',  "", .)
	rename geofips buyer_fips
	destring buyer_fips, replace
	rename v buyer_income
	drop geoname region tablename linecode industryclassification unit description
	sort buyer_fips year
	save "$pathi\buyer_countyincome", replace

// Create population (buyer county)
	import delimited "$pathr\populationincome", clear
	keep if unit == "Number of persons"
	reshape long v, i(geofips) j(year)
	replace year = year + 1960
	replace geofips = subinstr(geofips, `"""',  "", .)
	rename geofips buyer_fips
	destring buyer_fips, replace
	rename v buyer_population
	drop geoname region tablename linecode industryclassification unit description
	sort buyer_fips year
	save "$pathi\buyer_countypopulation", replace
	
********************************************************************************	
*  Buyer County Education
********************************************************************************
// The data is from table B15001 from the American Community Survey
/* // Detailed instructions for download; this code will not work if table is not in certain format
	
	* If downloading it raw: search for table B15001, turn off margin of error, select geography of State -> County, and transpose the table. Then download each year as a CSV.
	* A quicker way: follow the link below and just change the year each time to download
	* https://data.census.gov/table/ACSDT5Y2010.B15001?q=b15001&g=010XX00US$0500000&moe=false&tp=true
*/

// Note - after download, save each year's file as "acs_YYYY_5yr.csv" to loop through them

// Loop through all years to clean strings into variables we want and create shares
forval x=2010(1)2016 {
	import delimited "$pathr\acs_`x'_5yr.csv", varnames(1) clear 

	* Create county variable 
	gen county=labelgrouping if total==""
	replace county = county[_n-1] if county == ""

	* Remove commas in total var
	replace total = subinstr(total, ",", "", .)
	
	* Remove commas, looping through every variable combo (gender, age group, education level)
	foreach g in male female {
		* Remove , in just the total for each gender
		replace total`g' = subinstr(total`g', ",", "", .)
		
		foreach a in 18to24years 25to34years 35to44years 45to64years 65yearsandover {
			foreach l in "" lesstha 9thto12 highsch associa somecol bachelo graduat {
				replace total`g'`a'`l' = subinstr(total`g'`a'`l', ",", "", .)
			}
		}
	}

	* Destring all variables
	destring, replace

	* Create hs or less variable 
	gen hs_or_less=totalmale18to24yearslessthan9thg + totalmale18to24years9thto12thgra + totalmale18to24yearshighschoolgr + totalmale25to34yearslessthan9thg + totalmale25to34years9thto12thgra + totalmale25to34yearshighschoolgr + totalmale35to44years9thto12thgra + totalmale35to44yearslessthan9thg + totalmale35to44yearshighschoolgr + totalmale45to64yearslessthan9thg + totalmale45to64years9thto12thgra + totalmale45to64yearshighschoolgr + totalmale65yearsandoverlessthan9 + totalmale65yearsandover9thto12th + totalmale65yearsandoverhighschoo + totalfemale18to24yearslessthan9t + totalfemale18to24years9thto12thg + totalfemale18to24yearshighschool + totalfemale25to34yearslessthan9t + totalfemale25to34years9thto12thg + totalfemale25to34yearshighschool + totalfemale35to44yearslessthan9t + totalfemale35to44years9thto12thg + totalfemale35to44yearshighschool + totalfemale45to64yearslessthan9t + totalfemale45to64years9thto12thg + totalfemale45to64yearshighschool + totalfemale65yearsandoverlesstha + totalfemale65yearsandover9thto12 + totalfemale65yearsandoverhighsch

	* Create share hs or less variable
	gen share_hs_or_less=hs_or_less/total

	* Create some college variable
	gen some_col=totalmale18to24yearssomecollegen + totalmale18to24yearsassociatesde + totalmale25to34yearssomecollegen + totalmale25to34yearsassociatesde + totalmale35to44yearssomecollegen + totalmale35to44yearsassociatesde + totalmale45to64yearssomecollegen + totalmale45to64yearsassociatesde + totalmale65yearsandoversomecolle + totalmale65yearsandoverassociate + totalfemale18to24yearssomecolleg + totalfemale18to24yearsassociates + totalfemale25to34yearssomecolleg + totalfemale25to34yearsassociates + totalfemale35to44yearssomecolleg + totalfemale35to44yearsassociates + totalfemale45to64yearssomecolleg + totalfemale45to64yearsassociates + totalfemale65yearsandoversomecol + totalfemale65yearsandoverassocia

	* Create share some college variable
	gen share_some_col=some_col/total

	*create bachelors degree only variable 
	gen bachelors=totalmale18to24yearsbachelorsdeg + totalmale25to34yearsbachelorsdeg + totalmale35to44yearsbachelorsdeg + totalmale45to64yearsbachelorsdeg + totalmale65yearsandoverbachelors + totalfemale18to24yearsbachelorsd + totalfemale25to34yearsbachelorsd + totalfemale35to44yearsbachelorsd + totalfemale45to64yearsbachelorsd + totalfemale65yearsandoverbachelo

	* Create share bachelors variable 
	gen share_bachelors=bachelors/total

	* Create post grad variable 
	gen post_grad=totalmale18to24yearsgraduateorpr + totalmale25to34yearsgraduateorpr + totalmale35to44yearsgraduateorpr + totalmale45to64yearsgraduateorpr + totalmale65yearsandovergraduateo + totalfemale18to24yearsgraduateor + totalfemale25to34yearsgraduateor + totalfemale35to44yearsgraduateor + totalfemale45to64yearsgraduateor + totalfemale65yearsandovergraduat

	* Create share post grad variable 
	gen share_post_grad=post_grad/total 

	* Remove unnecessary rows
	drop if hs_or_les==. & some_col==. & bachelors==. & post_grad==.

	* Get rid of the word "county" in county variable 
	gen splitat=ustrpos(county, " County, ")
	replace splitat=ustrpos(county, " Borough, ") if splitat==0
	replace splitat=ustrpos(county, " Census Area, ") if splitat==0
	replace splitat=ustrpos(county, " Parish, ") if splitat==0
	replace splitat=ustrpos(county, " Municipio, ") if splitat==0
	replace splitat=ustrpos(county, " City and Borough, ") if splitat==0
	replace splitat=ustrpos(county, " Municipality, ") if splitat==0
	replace splitat=ustrpos(county, ", ") if splitat==0

	* Create separate state and county variables
	gen county2=usubstr(county, 1, splitat-1)
	gen state=usubstr(county, splitat+1, .)
	replace state = usubstr(state,ustrpos(state," ") + 1,.) 
	replace state=subinstr(state, "Area, ", "", .)
	replace state=lower(state)
	rename county place
	rename county2 county
	replace county=lower(county)

	* Keep only necessary variables 
	keep place hs_or_less share_hs_or_less some_col share_some_col bachelors share_bachelors post_grad share_post_grad county state total

	* Create year variable 
	gen year=`x'

	* Match to FIPS code county names 
	drop if state=="undefined"
	drop if state=="puerto rico"
	replace county=subinstr(county, ".", "", .)
	replace county=subinstr(county, "'", "", .)
	replace county="de kalb" if county=="dekalb"
	replace county="de soto" if county=="desoto"
	replace county="de witt" if county=="dewitt"
	replace county="du page" if county=="dupage"
	replace county="o brien" if county=="obrien"
	replace county="la salle" if county=="lasalle"
	replace county="la porte" if county=="laporte"
	replace county="la moure" if county=="lamoure"
	replace county="la grange" if county=="lagrange"
	replace county="dona ana" if county=="do?a ana"
	replace county=subinstr(county, " city and", "", .)
	replace county="prince of wales-outer ketchikan" if county=="prince of wales-hyder"

	* Merge with FIPS codes 
	merge 1:1 county state using "$pathi\fips county name crosswalk.dta"
	drop if _merge !=3
	drop _merge
	tempfile acs`x'
	save `acs`x''

}

// Append all years 
	clear
	forval x=2010(1)2016 {
		append using `acs`x''
	}
	
// Rename FIPS code for buyer county merging
	rename fips_code buyer_fips
	save "$pathi\buyer_countyeducation.dta", replace


********************************************************************************	
*  Demographic Data
********************************************************************************
// Demographic data from SEER (the National Cancer Institute's Surveillance, Epidemiology, and End Results Program); annual county-level population files for single-year age groups, 4 expanded races by origin
	* https://seer.cancer.gov/popdata/download.html#single
	
// Clean SEER data
	import delimited "$pathr\us.1990_2020.singleages.txt", clear
	
// Create separate variables from single variable in data
	gen year = substr(v1,1,4)
	destring year, replace
	gen state_abb = substr(v1,5,2)
	gen state_fips = substr(v1,7,2)
	destring state_fips, replace
	gen county_fips = substr(v1,9,3)
	destring county_fips, replace
	gen registry = substr(v1,12,2)
	destring registry, replace
	label variable registry "Registry (Geographic Definition Only). See codebook."
	gen race = substr(v1,14,1)
	destring race, replace
	label variable race "1 = white, 2 = Black, 3 = Am.Indian/Alaska 4 = Asian/PacIs"
	gen origin = substr(v1,15,1)
	destring origin, replace
	label variable origin "0=Non-Hispanic,1=Hispanic,9=Not applicable"
	gen sex = substr(v1,16,1)
	destring sex, replace
	label variable sex "1=Male,2=Female"
	gen age = substr(v1,17,2)
	destring age, replace
	label variable age "00=0 years,01=1-4years,-2=5-9years...18=85+ See codebook"
	gen population = substr(v1,19,8)
	destring population, replace

// Make FIPS code match FIPS codes in other data
	gen fips_code=substr(v1, 7, 5)
	destring fips_code, replace

// Collapse data to county-year level for gender
	preserve
	collapse (sum) population, by(year fips_code sex)
	gen female = 0
	replace female=1 if sex==2
	gen male = 1-female

// Create female/male population counts
	gen population_female = population*female
	gen population_male = population*male

// Collapse data to have one observation per county-year
	collapse (sum) population_female population_male, by(year fips_code)

// Create total county population variable
	gen total_county_population = population_female+ population_male

// Create share male/female variables
	gen pop_share_female = population_female/total_county_population
	gen pop_share_male = population_male/total_county_population

	tempfile SEER_Population_sex
	save `SEER_Population_sex'
	restore

// Collapse data to county-year level for race
	preserve
	collapse (sum) population, by(year fips_code race origin)

// Create race dummies
	gen white =(race==1 & origin==0)
	gen Black = (race==2 & origin==0)
	gen Asian=(race==4 & origin==0)
	gen other=(race==3 & origin==0)
	gen Hispanic=(origin==1)

// Create total population by race variables
	gen population_white = population*white
	gen population_Black = population*Black
	gen population_other = population*other
	gen population_Asian = population*Asian
	gen population_Hispanic = population*Hispanic 

// Collapse data to have one observation per county-year
	collapse (sum) population_white population_Black population_other population_Asian population_Hispanic, by(year fips_code)

// Create total population per county-year variable
	gen total_county_population = population_white + population_Black + population_other + population_Asian + population_Hispanic

// Create share of each race in population variables
	gen pop_share_white = population_white/total_county_population
	gen pop_share_Black = population_Black/total_county_population
	gen pop_share_other = population_other/total_county_population
	gen pop_share_Asian = population_Asian/total_county_population
	gen pop_share_Hispanic = population_Hispanic/total_county_population

	tempfile SEER_Population_race
	save `SEER_Population_race'
	restore

// Collapse data to county-year level for age
	collapse (sum) population, by(year fips_code age)

// Create age categories to match gallup data
	gen age_18_29 = (age >=18 & age <=29)
	gen age_30_49 =(age>=30 & age<=49)
	gen age_50_64 =(age>=50 & age<=64)
	gen age_65plus = (age>=65)
	gen age_other=(age_18_29==0 & age_30_49==0 & age_50_64==0 & age_65plus==0)

// Create population counts by age category
	gen population_age_18_29 = age_18_29*population
	gen population_age_30_49 = age_30_49*population
	gen population_age_50_64 = age_50_64*population
	gen population_age_65plus = age_65plus*population
	gen population_age_other=age_other*population

// Collapse data to have one observation per county-year
	collapse (sum) population_age_18_29 population_age_30_49 population_age_50_64 population_age_65plus population_age_other, by(year fips_code)

// Create total county population variable
	gen total_county_population = population_age_18_29 + population_age_30_49 + population_age_50_64 + population_age_65plus + population_age_other

// Create share of each age in population variables
	gen pop_share_age_18_29 = population_age_18_29/total_county_population
	gen pop_share_age_30_49 = population_age_30_49/total_county_population
	gen pop_share_age_50_64 = population_age_50_64/total_county_population
	gen pop_share_age_65plus = population_age_65plus/total_county_population
	drop population_age_other
	tempfile SEER_Population_age
	save `SEER_Population_age'

// Merge with sex and race data 
	merge 1:1 year fips_code total_county_population using `SEER_Population_race'
	drop _merge 
	merge 1:1 year fips_code total_county_population using `SEER_Population_sex'
	drop _merge 
	save "$pathi\SEER_Population.dta", replace

// Rename and save version for buyer_fips
	rename fips_code buyer_fips
	save "$pathi\buyer_SEER_Population.dta", replace
	
	
********************************************************************************	
*  All shares - SEER, education, election
********************************************************************************	
// This code combines the election shares, educational shares, and SEER demographic shares for each county	

// Load election data, clean a little, save temp file
	u "$pathi\election_EC.dta", clear 
	rename electionyear year
	keep if inrange(year, 2004, 2016)
	drop county
	tempfile election
	save `election'

// Import education data
	u "$pathi\buyer_countyeducation.dta", clear

// Merge with SEER data 
	merge 1:1 year buyer_fips using "$pathi\buyer_SEER_Population.dta"
	keep if inrange(year, 2004, 2016)
	rename _merge seer_acs_merge

// Merge with election data 
	merge 1:1 buyer_fips year using `election'
	drop if _merge==2 
	rename _merge election_merge 

// Make gop and dem shares equal to most recent election year results 
	* Make gop, dem, ind share variables for each election year
	forval y = 2004(4)2016 {
		gen gopshare_`y'=gopshare if year==`y'
		gen demshare_`y'=demshare if year==`y'
		gen indshare_`y'=indshare if year==`y'
	}
	
	* Make max values apply to all the in between years
	foreach var in gopshare_2004 gopshare_2008 gopshare_2012 gopshare_2016 demshare_2004 demshare_2008 demshare_2012 demshare_2016 indshare_2004 indshare_2008 indshare_2012 indshare_2016 {
		bysort buyer_fips: egen max=max(`var')
		replace `var'=max
		drop max
	}

	* Apply year variable values to the single share variable for in between years
	foreach p in gop dem ind{
		replace `p'share=`p'share_2004 if inrange(year, 2005, 2007)
		replace `p'share=`p'share_2008 if inrange(year, 2009, 2011)
		replace `p'share=`p'share_2012 if inrange(year, 2013, 2015)
	}
	

// Drop 2004 - want 2005 and later
	keep if inrange(year, 2005, 2016)

// Keep relevant variables
	keep buyer_fips year share_hs_or_less share_some_col share_bachelors share_post_grad pop_share_age_18_29 pop_share_age_30_49 pop_share_age_50_64 pop_share_age_65plus pop_share_white pop_share_Black pop_share_other pop_share_Asian pop_share_Hispanic pop_share_female pop_share_male demshare gopshare indshare
	sort buyer_fips
	order buyer_fips year
	save "$pathi\county_2005_2016.dta", replace

********************************************************************************	
*  Unempoyment Rates
********************************************************************************
// This data on county-level unemployment rates comes from the BLS 
	* https://www.bls.gov/lau/tables.htm (Scroll down to Annual Average Data --> County tables)
	
// Import excel files and clean them to be usable, naming variables
forval year=1(1)16 {
	import excel "$pathr\laucnty`year'.xlsx", firstrow clear
	ren B state_fips 
	ren C county_fips 
	ren E year
	ren J unemp_rate 
	gen n=_n
	drop if n<=5
	drop n
	destring unemp_rate year, replace force

	* Create FIPS code variable that combines state and county FIPS 
	gen cfips_length = strlen(county_fips)
	gen fips_code=state_fips + "00" + county_fips if cfips_length==1
	replace fips_code=state_fips + "0" + county_fips if cfips_length==2
	replace fips_code=state_fips + county_fips if cfips_length==3
	destring fips_code, replace force

	* Keep relevant variables, save temp file
	keep year fips_code unemp_rate 
	tempfile unemp`year'
	save `unemp`year''
}

// Append all years together and save
	clear 
	forval year=1(1)16 {
		append using `unemp`year''
	}
	ren fips_code buyer_fips
	drop if buyer_fips==.
	save "$pathi\unemp_2001_2016.dta", replace

********************************************************************************	
*  Test Scores
********************************************************************************
// Test score data comes from the Stanford Education Data Archive and can be downloaded here: https://edopportunity.org/get-the-data/

// Use and keep relevant years
	use "$pathr\seda_county_long_gcs_4.1.dta", clear
	keep if year <=2016

// Create weight for number of people in grade-subject-year cohort 
	rename sedacounty fips_code 
	bysort fips_code year: egen nstudents=sum(totgyb_all)
	gen share_students=totgyb_all/nstudents 
	
// Create average reading and math score by county year 
	bysort fips_code year: egen mean_test_score=sum(gcs_mn_all*share_students)

// Keep relevant variables amd save
	keep fips_code year mean_test_score
	duplicates drop fips_code year, force
	rename fips_code buyer_fips
	drop if buyer_fips==.
	save "$pathi\tests_2009_2016.dta", replace

********************************************************************************	
*  Crime
********************************************************************************
// This file contains the yearly total number of arrests at the county level from the Uniform Crime Reporting (UCR) Program Data. The file, called "Arrests by Age, Sex, and Race, 1974-2021", was accessed through OpenICPSR using Jacob Kaplan's unzipped concatenated files. 
	* https://www.openicpsr.org/openicpsr/project/102263/version/V15/view?path=/openicpsr/102263/fcr:versions/V15/ucr_arrests_monthly_all_crimes_race_sex_1974_2020_dta.zip&type=file
	
// Loop through each year's file and clean
forval year=2001(1)2016 {
	use "$pathr\ucr_arrests_monthly_all_crimes_race_sex_`year'.dta", clear
	destring fips_state_county_code, replace force
	ren fips_state_county_code fips_code 


	* Add duplicate months data 
	foreach var in agg_assault_tot_arrests all_other_tot_arrests arson_tot_arrests curfew_loiter_tot_arrests disorder_cond_tot_arrests drunkenness_tot_arrests dui_tot_arrests embezzlement_tot_arrests family_off_tot_arrests forgery_tot_arrests fraud_tot_arrests gamble_bookmake_tot_arrests gamble_lottery_tot_arrests gamble_other_tot_arrests liquor_tot_arrests manslaught_neg_tot_arrests mtr_veh_theft_tot_arrests murder_tot_arrests oth_assault_tot_arrests oth_sex_off_tot_arrests poss_cannabis_tot_arrests poss_drug_total_tot_arrests poss_heroin_coke_tot_arrests poss_other_drug_tot_arrests poss_synth_narc_tot_arrests prostitution_tot_arrests rape_tot_arrests robbery_tot_arrests runaways_tot_arrests sale_cannabis_tot_arrests sale_drug_total_tot_arrests sale_heroin_coke_tot_arrests sale_other_drug_tot_arrests sale_synth_narc_tot_arrests stolen_prop_tot_arrests suspicion_tot_arrests theft_tot_arrests total_drug_tot_arrests vagrancy_tot_arrests vandalism_tot_arrests weapons_tot_arrests {
		bysort fips_code: egen s_`var'=sum(`var')
	}

	* Keep only total variables 
	keep year fips_code s_*
	duplicates drop fips_code, force 

	* Create yearly total arrests variable 
	gen total_arrests=s_agg_assault_tot_arrests + s_all_other_tot_arrests + s_arson_tot_arrests + s_curfew_loiter_tot_arrests + s_disorder_cond_tot_arrests + s_drunkenness_tot_arrests + s_dui_tot_arrests + s_embezzlement_tot_arrests + s_family_off_tot_arrests + s_forgery_tot_arrests + s_fraud_tot_arrests + s_gamble_bookmake_tot_arrests + s_gamble_lottery_tot_arrests + s_gamble_other_tot_arrests + s_liquor_tot_arrests + s_manslaught_neg_tot_arrests + s_mtr_veh_theft_tot_arrests + s_murder_tot_arrests + s_oth_assault_tot_arrests + s_oth_sex_off_tot_arrests + s_poss_cannabis_tot_arrests + s_poss_drug_total_tot_arrests + s_poss_heroin_coke_tot_arrests + s_poss_other_drug_tot_arrests + s_poss_synth_narc_tot_arrests + s_prostitution_tot_arrests + s_rape_tot_arrests + s_robbery_tot_arrests + s_runaways_tot_arrests + s_sale_cannabis_tot_arrests + s_sale_drug_total_tot_arrests + s_sale_heroin_coke_tot_arrests + s_sale_other_drug_tot_arrests + s_sale_synth_narc_tot_arrests + s_stolen_prop_tot_arrests + s_suspicion_tot_arrests + s_theft_tot_arrests + s_total_drug_tot_arrests + s_vagrancy_tot_arrests + s_vandalism_tot_arrests + s_weapons_tot_arrests

	* Save temp file
	tempfile crime`year'
	save `crime`year''
}

// Append all years
	clear 
	forval year=2001(1)2016 {
		append using `crime`year''
	}
	
// Keep relevant vars, drop missing counties, save
	keep fips_code year total_arrests 
	drop if fips_code==.
	save "$pathi\crime_2001_2016.dta", replace
	
	
********************************************************************************	
*  New Housing Units from Building Permits
********************************************************************************
// Building permits comes from the Building Permits Survey from the Census Bureau downloaded here https://www2.census.gov/econ/bps/County/ (we use the co<YYYY>a.txt files)

* Note - save the files as buildYYYY.txt

// Loop over each year to clean and create proper county variables
forval year=2001(1)2016 {
	import delimited "$pathr\build`year'.txt", delimiter(comma) varnames(2) clear

	* Drop first row of data that's empty
	gen n=_n
	drop if n==1
	drop n

	* Drop unneccessary variables 
	drop v19 v20 v21 v22 v23 v24 v25 v26 v27 v28 v29 v30
	destring date, replace 
	ren date year

	* Create county FIPS variable that combines state and county codes
	tostring(state), replace
	tostring(county), replace
	gen cfips_length = strlen(county)
	gen fips_code=state + "00" + county if cfips_length==1
	replace fips_code=state + "0" + county if cfips_length==2
	replace fips_code=state + county if cfips_length==3
	destring fips_code, replace force

	* Rename vars 
	ren v10 bldgs2
	ren v11 units2
	ren v12 value2
	ren v13 bldgs3 
	ren v14 units3
	ren v15 value3
	ren v16 bldgs4
	ren v17 units4
	ren v18 value4

	* Sum building vars 
	gen bldgs_all=bldgs+bldgs2+bldgs3+bldgs4 
	gen units_all=units+units2+units3+units4 
	gen value_all=value+value2+value3+value4

	keep year fips_code bldgs_all units_all value_all 
	tempfile build`year'
	save `build`year''
}

// Append all years 
	clear 
	forval year=2001(1)2016 {
		append using `build`year''
	}

// Drop missing counties and duplicates, save
	drop if fips_code==.
	duplicates drop fips_code year, force
	save "$pathi\build_2001_2016.dta", replace


********************************************************************************	
*  Flood Events 2001-2016
********************************************************************************
// This file is created from NOAA's Storm Events Database to calculate the number of flood events each year, lagged by one year to control for the previous year's flood events. 
// We use the "StormEvents\_details-ftp" files from https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/

* Note - the raw folders for each year are zipped; they will need to be unzipped and saved as "yYYYY" for the following code to run

// Loop through years to clean FIPS codes and flood data
forval year=2000(1)2016{
	import delimited "$pathr\y`year'\StormEvents_details-ftp_v1.0_d`year'_c20220425.csv", delimiter(comma) varnames(1) clear 
		* note - the last part of this file name will change based on download date, so either change the file names or change the code to match yours

	* Create county FIPS code from state and county fips
	tostring cz_fips state_fips, replace 
	gen cfips_length = strlen(cz_fips)
	gen fips_code=state_fips + "00" + cz_fips if cfips_length==1
	replace fips_code=state_fips + "0" + cz_fips if cfips_length==2
	replace fips_code=state_fips + cz_fips if cfips_length==3
	destring fips_code, replace force

	* Keep only flooding events 
	keep if event_type=="Coastal Flood" | event_type=="Flash Flood" | event_type=="Flood" | event_type=="Lakeshore Flood" | event_type=="Storm Surge/Tide" 

	* Sum number of flooding events for each county 
	gen one=1
	bysort fips_code: egen prev_floods=sum(one)

	* Keep relevant variables 
	keep fips_code year prev_floods 
	duplicates drop fips_code, force
	tempfile flood`year'
	save `flood`year''
}

// Append all years 
	clear 
	forval year=2000(1)2016 {
		append using `flood`year''
	}

// Lag the flood events one year and save
	replace year=year+1
	drop if year==2017
	save "$pathi\flood_2001_2016.dta", replace
